/*	SLR parser generator

given slr grammar in file `grammar', generate characteristic fsm for it
`grammar.c' is generated from cfsm to recognize the grammar
`grammar.o' gives description of state machine

*/


#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <system.h>

#define gen(p) fputs(p,output)

FILE *grammar, *output, *des;

int states[100], sprod[1000], sposn[500], togen[500],
    tsnext[500], tsposn[500], trn[500], sttype[100],
    nstates=0, nterms=0, nnonterms=0, ntogen=1, trans=0, trs[1000];

char *pptr, *prods[100], terms[50], nonterms[50], exnon[50], trc[1000];

void main()
{
   char o, ia[100], out[200], rt[50],
	*in, *tr, *aptr, *optr, *rtptr, nxtchr;

   int	check, prct, z, rules=0, rout, flag=0, nxt=0, cflag,
	i, stype, rdstate, reduce, start, tf, tt, ot;

   grammar=fopen("grammar","r"); output=fopen("grammar.c","w");
   des=fopen("grammar.o","w");

   if(grammar==NULL || output==NULL || des==NULL)
      {
      printf("file open failure\n"), 
      exit(1);
      }

   for(i=0; i<100; i++) prods[i]=malloc(10);

   *terms='\0'; *nonterms='\0';

   /**** get grammar from input file */

   for(;;)
      {
      if(fgets(ia,100,grammar)==0) break;
      in=ia;
      while(isspace(*in)) in++;

      tr=prods[rules++]; *tr=*in++;
      while(isspace(*in)) in++;

      if(!isupper(*tr) || *in++!='-' || *in++!='>')
	 printf("bad grammar file\n"),
	 fclose(output),
	 exit(0);

      while(isspace(*in)) in++;

      while(*tr++)
	 if((*tr=*in++)==' ') --tr;

      *(tr-2)='\0';
      }

   fclose(grammar);

   /**** show grammar */

   fprintf(des,"\ninput grammar : found %d productions ...\n\n",rules);

   for(i=0; i<rules; i++)
      fprintf(des,"  %c -> .%s. \n",*prods[i],prods[i]+1);

   /**** make list `nonterms' of non-terminals for which productions exist */

   for(i=0; i<rules; i++)
      {
      for(flag=0, check=0; check<nnonterms; check++)
	  if(prods[i][0]==nonterms[check]) flag=1;

      if(!flag) nonterms[nnonterms++]=prods[i][0];
      }

   nonterms[nnonterms]='\0';
   fprintf(des,"\nnon-terminals are .%s.\n",nonterms);

   /**** check that every rhs non-terminal has a production,list non-terminals */

   for(i=0; i<rules; i++)
      {
      tr=prods[i];
      while(*++tr)
	 {
	 flag=0;
	 if(isupper(*tr))
	    {
	    aptr=nonterms;
	    while(*aptr) if(*aptr++==*tr) flag=1;

	    if(!flag)
	       printf("can\'t find production for %c in rule %s\n",
							*tr,prods[rout]),
	       exit(1);
	    }
	 else
	    {
	    aptr=terms;
	    while(*aptr) if(*aptr++==*tr) flag=1;
	    if(!flag)
	       { nterms++; *aptr++=*tr; *aptr='\0';}
	    }
	 }
      }
   fprintf(des,"\nterminals are .%s.\n",terms);

   /**** find starting S rule ... usually the first, must be unique */

   for(start=-1, i=0; i<rules; i++)
      if(prods[i][0]=='S')
	 if(start==-1)
	    start=i;
	 else
	    printf("starting rule must be unique\n"),
	    exit(1);


   /**** expand the grammar into a characteristic fsm */

   tsnext[0]=0, tsposn[0]=0, tsnext[1]=-1, tsposn[1]=-1, togen[0]=0;

   while(ntogen>nstates)  /* while there are states to expand, do nstate'th */
      {
      if(nstates>0)
	 {
	 nxt=states[nstates-1];
	 while(sprod[nxt++]!=-1);
	 }
      else
	 nxt=0;

      states[nstates]=nxt; exnon[0]='\0';
      tf=togen[nstates]; tt=nxt;
      rdstate=0; reduce=0;

      /* copy unexpanded state to state table */
      while(((sprod[tt]=tsnext[tf])&(sposn[tt]=tsposn[tf]))+1) tt++, tf++;

      while(nxt<tt)  /* expand each following nterm until complete */
	 {
	 if(isupper(o=prods[sprod[nxt]][sposn[nxt]+1])) /* non-terminal next? */
	    {
	    aptr=exnon;

	    while(*aptr)
	       if(*aptr==o)
		  break;
	       else
		  aptr++;

	    if(*aptr=='\0')  /* then we must expand the nterm's productions */
	       {
	       *aptr++=o; *aptr='\0';

	       for(i=0; i<rules; i++)  /* insert prods for nterm */
		  if(prods[i][0]==o)
		     {
		     sprod[tt]=i;   sposn[tt++]=0;
		     sprod[tt]=-1;  sposn[tt]=-1;
		     }
	       }
	    }

	 nxt++;
	 }

      ssort(nstates); nxt=states[nstates++];
      tf=togen[ntogen-1]; while(tsnext[tf++]+1);

      /* now copy each branch out of the state into the unexp table and try it */
      while(sprod[nxt]!=-1)
	 {
	 nxtchr=prods[sprod[nxt]][sposn[nxt]+1];

	 tf=togen[ntogen-1];
	 while(tsnext[tf++]+1);
	 ot=tf; togen[ntogen]=tf;

	 if(nxtchr=='\0')
	    {
	    trc[trans]=(char)sposn[nxt];
	    trs[trans]=nxt++;
	    trn[trans++]=-2; reduce=1;
	    }
	 else
	    {
	    while((sprod[nxt]!=-1)&& (nxtchr==prods[sprod[nxt]][sposn[nxt]+1]))
	       {
	       tsnext[ot]=sprod[nxt];
	       tsposn[ot++]=sposn[nxt++]+1;
	       }

	    tsnext[ot]=-1; tsposn[ot]=-1;
	    trc[trans]=nxtchr; rdstate=1;

	    if(z=findstate(ntogen))
	       trn[trans++]=z;
	    else
	       trn[trans++]=ntogen++;
	    }
	 }

      trn[trans]=-1;
      trc[trans++]='0'+rdstate+reduce*2;
      }

   /**** list cfsm's transitions */

   tt=0;

   for(i=0; i<nstates; i++)
      {
      fprintf(des,"\nstate  %d	\n",i);
      while(trn[tt++]!=-1)
	 if(trn[tt-1]!=-2)
	    fprintf(des," %c %d \n",trc[tt-1],trn[tt-1]);
	 else
	    fprintf(des," $\n");

      stype=trc[tt-1]-'0'; sttype[i]=stype;
      if(stype==1) fprintf(des,"read\n");
      if(stype==2) fprintf(des,"reduce\n");
      if(stype==3) fprintf(des,"inadequate\n");
      }

   /**** generate the parser */

   tt=0;

   fputs("#include <stdio.h>\n#include <system.h>\n\n",output);
   fputs("#define push(a)\t*++sp=a;\n#define pop()",output);
   fputs("\t(sp==stack?(exit(0),*sp):*sp--)\n\n",output);
   fputs("char stack[1000],*sp,res[5000],ia[100],*in;\n\n",output);
   fputs("main()\n{\n\tint state,item,resn=0,finish=0;\n\n",output);
   fputs("\tsp=stack; gets(in=ia); push(0);",output);
   fputs("\n\n\tfor(;;)\n\t{\n\t\tstate=pop(); push(state);\n",output);
   fputs("\t\tif(finish) break;\n\n\t\tswitch(state)\n\t\t{\n",output);

   for(i=0; i<nstates; i++)	/* generate the cfsm's case statement */
      switch(sttype[i])
	 {
	 case 1: /* read state */
		fprintf(output,"\t\tcase %2d : push(item=*in++);\n",i);
		fprintf(output,"\n\t\t\tswitch(item){\n");

		while(trn[tt++]!=-1)
		   fprintf(output,"\t\t\t\tcase \'%c\':push(%d); break;\n",
				trc[tt-1],trn[tt-1]);

		fprintf(output,"\t\t\t\tdefault :");
		fprintf(output,"printf(\"parse failed\");exit(0);\n");
		fprintf(output,"\t\t\t} break;\n\n");
		break;

	 case 2: /* reduce state */
		fprintf(output,"\t\tcase %2d : ",i);

		for(ot=0; ot<trc[tt]; ot++)
		   fprintf(output,"pop();pop();");

		fprintf(output," *--in=\'%c\';\n",prods[sprod[states[i]]][0]);

		fprintf(output,"\t\t\tres[resn++]=%d;",sprod[states[i]]);

		if(prods[sprod[states[i]]][0]=='S')
		   fprintf(output," if(*in==\'S\') finish=1;");

		fprintf(output," break;\n\n");
		tt++;tt++;
		break;

	 case 3: /* inadequate state */
		tf=tt; rtptr=rt;
		while(trn[tf]!=-1)
		   if(trn[tf++]!=-2)
		      *rtptr++=trc[tf-1];
		   else
		      o=prods[z=sprod[trs[tf-1]]][0];

		*rtptr='\0'; rtptr=rt;
		fprintf(output,"\t\tcase %2d : if(*in==\'%c\'",i,*rtptr++);

		while(*rtptr)
		   fprintf(output,"||*in==\'%c\'",*rtptr++);

		fprintf(output,"){\n\t\t\t\tpush(item=*in++);");
		fprintf(output,"\n\n\t\t\t\tswitch(item){\n"); tf=tt;

		while(trn[tt++]!=-1)
		   if(trn[tt-1]!=-2)
		      fprintf(output,"\t\t\t\t\tcase \'%c\':push(%d); break;\n",
			trc[tt-1],trn[tt-1]);

		fprintf(output,"\t\t\t\tdefault :");
		fprintf(output,"printf(\"parse failed\");exit(0);}\n");
		fprintf(output,"\t\t\t\t} else {\n\t\t\t");

		while(trn[tf++]!=-2);

		for(ot=0; ot<trc[tf-1]; ot++)
		   fprintf(output,"pop();pop();");

		fprintf(output," *--in=\'%c\';\n",o);
		fprintf(output,"\t\t\t\tres[resn++]=%d; ",z);
		fprintf(output,"\n\t\t\t} break;\n");
		break;
	 }

   fprintf(output,"\n\t\t}\n\t}\n\n\tprintf(\"parsed: \");\n\tfor(item=0;");
   fprintf(output,"item<resn;item++)\n\t\tprintf(\" %%2d\",res[item]);\n}\n");

   fclose(des);
   fclose(output);
}


/**** sort the configuration set for a given state */

ssort(int st)
{
   int pstart,nstat,t,la,lb;

   for(nstat=0, pstart=states[st]; sprod[pstart++]!=-1; nstat++) ;

   if(nstat<2) return;
   pstart=states[st];

   for(la=0; la<nstat; la++)
      for(lb=pstart; lb<(pstart+nstat-1); lb++)

	 if(1000*(int)prods[sprod[lb]][sposn[lb]+1]+50*(int)sprod[lb]+sposn[lb]>
	    1000*(int)prods[sprod[lb+1]][sposn[lb+1]+1]+50*(int)sprod[lb+1]+sposn[lb+1])
	       {
	       t=sprod[lb]; sprod[lb]=sprod[lb+1]; sprod[lb+1]=t;
	       t=sposn[lb]; sposn[lb]=sposn[lb+1]; sposn[lb+1]=t;
	       }
}

/**** compare two unexpanded state sets */

int compare(int f1, int f2)
{
   if(f1==f2) return 0;
   f1=togen[f1]; f2=togen[f2];

   while(tsnext[f1]==tsnext[f2] && tsposn[f1++]==tsposn[f2++])
      if(tsnext[f1-1]==-1) return 1;

   return 0;
}

/**** find if state already exists */

int findstate(int state)
{
   int i;

   for(i=0; i<ntogen; i++)
      if(compare(state,i)) return i;

   return 0;
}
